set more off
clear all

insheet using germany_table_11.csv, clear
replace src_kreis=upper(src_kreis)
replace dst_kreis=upper(dst_kreis)
gen page_1=page
gen threshold_1=threshold

drop if dst_nuts=="UBKR1"|dst_nuts=="UBKR2"|src_nuts=="UBKR1"|src_nuts=="UBKR2"
collapse (mean) flow (first) src_nuts dst_nuts (min) page threshold (max) page_1 threshold_1, by(src_kreis dst_kreis)
egen threshold_2=rowmax(threshold_1 threshold)
drop threshold_1 threshold
rename threshold_2 threshold
save temp, replace



* create all possible paiwise combinations between micro-regions -- for same NUTS2
use temp, clear
keep dst_nuts dst_kreis
duplicates drop
encode dst_kreis, generate(code_dst_kreis)
rename dst_nuts nuts
rename dst_kreis kreis
gen code_src_kreis=code_dst_kreis
save pair_nuts, replace



* create all possible paiwise combinations between micro-regions -- for flows
use temp, clear
encode src_kreis, generate(code_src_kreis)
encode dst_kreis, generate(code_dst_kreis)
keep flow code_src_kreis code_dst_kreis
reshape wide flow, i(code_src_kreis) j(code_dst_kreis)
reshape long flow, i(code_src_kreis) j(code_dst_kreis)
joinby code_src_kreis using pair_nuts, unmatched(both)
tab _merge
drop _merge
rename nuts src_nuts
joinby code_dst_kreis using pair_nuts, unmatched(both)
rename nuts dst_nuts
tab _merge
drop _merge kreis
save pair_flow, replace



* create all possible paiwise combinations between micro-regions -- for threshold
use temp, clear
encode src_kreis, generate(code_src_kreis)
encode dst_kreis, generate(code_dst_kreis)
keep threshold code_src_kreis code_dst_kreis
reshape wide threshold, i(code_src_kreis) j(code_dst_kreis)
reshape long threshold, i(code_src_kreis) j(code_dst_kreis)
joinby code_src_kreis using pair_nuts, unmatched(both)
tab _merge
drop _merge
rename nuts src_nuts
joinby code_dst_kreis using pair_nuts, unmatched(both)
rename nuts dst_nuts
tab _merge
drop _merge kreis
save temp2, replace
collapse (max) threshold, by(code_src_kreis)
rename threshold threshold2
joinby code_src_kreis using temp2, unmatched(both)
drop threshold _merge
rename threshold2 threshold
save pair_threshold, replace



use pair_threshold, clear
joinby code_dst_kreis code_src_kreis using pair_flow, unmatched(both)
tab _merge
drop _merge
gen src_land=substr(src_nuts,1,3)
gen dst_land=substr(dst_nuts,1,3)
gen same_kreis=(code_dst_kreis==code_src_kreis)
gen same_land=(dst_land==src_land)
gen same_nuts=(dst_nuts==src_nuts)
gen ubkr=0
replace ubkr=1 if same_nuts==1&same_kreis!=1&flow==.
replace flow=(threshold-1) if same_nuts==1&same_kreis!=1&flow==.
save temp2, replace



* Compute inflows and outflows: into/from same NUTS under UBKR1 and into/from other NUTS -- from the data that is explicitly reported and restored (see lines 85-86)
use temp2, clear
collapse (sum) flow (first) src_nuts if ubkr==1, by(code_src_kreis)
rename flow ubkr_outflow
rename code_src_kreis code_kreis
rename src_nuts nuts
save ubkr_outflow, replace

use temp2, clear
collapse (sum) flow (first) src_nuts if same_nuts!=1, by(code_src_kreis)
rename flow other_outflow
rename code_src_kreis code_kreis
rename src_nuts nuts
save other_outflow, replace

use temp2, clear
collapse (sum) flow (first) dst_nuts if ubkr==1, by(code_dst_kreis)
rename flow ubkr_inflow
rename code_dst_kreis code_kreis
rename dst_nuts nuts
save ubkr_inflow, replace

use temp2, clear
collapse (sum) flow (first) dst_nuts if same_nuts!=1, by(code_dst_kreis)
rename flow other_inflow
rename code_dst_kreis code_kreis
rename dst_nuts nuts
save other_inflow, replace


use pair_nuts, clear
keep nuts code_dst_kreis
rename code_dst_kreis code_kreis
joinby code_kreis using ubkr_outflow, unmatched(both)
drop _merge
joinby code_kreis using other_outflow, unmatched(both)
drop _merge
joinby code_kreis using ubkr_inflow, unmatched(both)
drop _merge
joinby code_kreis using other_inflow, unmatched(both)
drop _merge
replace ubkr_outflow=0 if ubkr_outflow==.
replace ubkr_inflow=0 if ubkr_inflow==.
replace other_outflow=0 if other_outflow==.
replace other_inflow=0 if other_inflow==.
save ubkr_other_flow, replace


*** Use data for Ubrige Kreise
insheet using germany_table_11.csv, clear
replace src_kreis=upper(src_kreis)
replace dst_kreis=upper(dst_kreis)
encode src_kreis, generate(code_src_kreis)
encode dst_kreis, generate(code_dst_kreis)
keep if code_dst_kreis==545|code_src_kreis==545
replace flow=0 if flow==.
save temp3, replace


*outflow from region to ubkr1
use temp3, clear
keep if dst_nuts=="UBKR1"
rename src_nuts nuts
rename code_src_kreis code_kreis
rename flow ubkr1_outflow
keep nuts code_kreis ubkr1_outflow
save ubkr1_outflow, replace

*outflow from region to ubkr2
use temp3, clear
keep if dst_nuts=="UBKR2"
rename src_nuts nuts
rename code_src_kreis code_kreis
rename flow ubkr2_outflow
keep nuts code_kreis ubkr2_outflow
save ubkr2_outflow, replace

*inflow from ubkr1 to region
use temp3, clear
keep if src_nuts=="UBKR1"
rename dst_nuts nuts
rename code_dst_kreis code_kreis
rename flow ubkr1_inflow
keep nuts code_kreis ubkr1_inflow
save ubkr1_inflow, replace

*inflow from ubkr2 to region
use temp3, clear
keep if src_nuts=="UBKR2"
rename dst_nuts nuts
rename code_dst_kreis code_kreis
rename flow ubkr2_inflow
keep nuts code_kreis ubkr2_inflow
save ubkr2_inflow, replace


use pair_nuts, clear
keep nuts code_dst_kreis
rename code_dst_kreis code_kreis
joinby code_kreis using ubkr1_outflow, unmatched(both)
drop _merge
joinby code_kreis using ubkr2_outflow, unmatched(both)
drop _merge
joinby code_kreis using ubkr1_inflow, unmatched(both)
drop _merge
joinby code_kreis using ubkr2_inflow, unmatched(both)
drop _merge
replace ubkr1_outflow=0 if ubkr1_outflow==.
replace ubkr1_inflow=0 if ubkr1_inflow==.
replace ubkr2_outflow=0 if ubkr2_outflow==.
replace ubkr2_inflow=0 if ubkr2_inflow==.
save ubkr1_ubkr2_flow, replace




*** Merge both data-files, compute inflows and outflows across NUTS2-borders 
use  ubkr1_ubkr2_flow, clear
joinby code_kreis using ubkr_other_flow, unmatched(both)
drop _merge

* if implied flows to/from the same NUTS2 (restored using threshold levels) are larger than total flows reported in UBKR1 then we assume that the whole UBKR1 flow takes place within the same NUTS2.
replace ubkr1_inflow=(ubkr1_inflow>=ubkr_inflow)*(ubkr1_inflow-ubkr_inflow)
replace ubkr1_outflow=(ubkr1_outflow>=ubkr_outflow)*(ubkr1_outflow-ubkr_outflow)

gen total_inflow=ubkr1_inflow+ubkr2_inflow+other_inflow
gen total_outflow=ubkr1_outflow+ubkr2_outflow+other_outflow
collapse (sum) total_inflow total_outflow, by(nuts)

save total_flows, replace

insheet using germany_population.csv, clear n
destring population, replace force
collapse (sum) population, by(nuts)
joinby nuts using total_flows, unmatched(both)
drop _merge
save total_flows, replace
